pacman::p_load(dplyr, ggplot2, tm, SnowballC, wordcloud2, RColorBrewer, 
               plotly, stringr, d3heatmap, htmlwidgets)


【A】News Article Corpus Summary

load('data/X.rdata')
summary(X)
     url                                  sub            date           
 Length:10760       Business & Finance      :2249   Min.   :2010-02-17  
 Class :character   R&D                     :1857   1st Qu.:2013-06-03  
 Mode  :character   Grid Connection         :1319   Median :2015-03-23  
                    Authorities             :1131   Mean   :2015-03-05  
                    Technology              :1077   3rd Qu.:2017-03-02  
                    Operations & Maintenance: 947   Max.   :2019-04-12  
                    (Other)                 :2180                       
    title             abstract            author              tags          
 Length:10760       Length:10760       Length:10760       Length:10760      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
     text                rov        
 Length:10760       Min.   : 0.000  
 Class :character   1st Qu.: 0.000  
 Mode  :character   Median : 0.000  
                    Mean   : 0.033  
                    3rd Qu.: 0.000  
                    Max.   :13.000  
                                    
par(cex=0.8, mar=c(6,4,4,2))
hist(X$date, "year", freq=T, main="No. Articles per Year", las=2, xlab="")

par(cex=0.8, mar=c(4,12,4,2))
table(X$sub) %>% sort %>% 
  barplot(las=2, horiz=T, main="No. Articles per Subject", xlab="freq")

X %>% 
  mutate(year = as.integer(format(date,"%Y"))) %>% 
  group_by(year, sub) %>% count %>% 
  ggplot(aes(x=year, y=n, fill=sub)) + 
  geom_bar(stat="identity", position="fill") +
  scale_x_continuous(breaks=2009:2019) -> p
ggplotly(p)


【B】Wordcloud by Subject

Business & Finance
1.png

1.png



R&D
2.png

2.png



Operations & Maintenance
3.png

3.png



Technology
4.png

4.png



Vessels
5.png

5.png





【C】ROV

X$rov = str_count(X$text, " ROV| ROUV")
rx = xtabs(rov ~ sub + format(date, "%Y"), X) 
rx
                          format(date, "%Y")
sub                        2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
  Authorities                 0    0    0    0    0    0    1    1    0    0
  Business & Finance          0    0    0    5   32    7   21    2   10    0
  Contracts & Tenders         0    0    0    0    0    0    0    5    3    1
  Environment                 0    0    0    0    0    0    0    0    1    0
  Grid Connection             0    9   13   12    1    1    5    8    2    0
  Industry Contribution       0    0    0    0    0    0    0    0    0    0
  Jobs & Recruitment          0    0    0    0    0    0    0    0    0    0
  Operations & Maintenance    0    1    7    1   10    9    3    6   11    2
  Ports & Logistics           0    0    1    0    0    0    0    1    0    0
  R&D                         0    0    3    8   16    7   15    3    0    0
  Technology                  3   11    2    8   13    5    6    8    8    0
  Training & Education        0    0    2    0   14    1    0    0    0    0
  Vessels                     0    0    0    0    1   11    7    6    5    0
  Wind Farm Update            0    0    0    0    0    0    0    0    0    0
rx %>% as.data.frame.matrix %>% d3heatmap(T,F, col="Greens")


ROV: Business & Finance
6.png

6.png



ROV: R&D
7.png

7.png



ROV: Operations & Maintenance
8.png

8.png



ROV: Technology
9.png

9.png



ROV: Vessels
10.png

10.png